Ejemplo tomado:
# Machine Learning
library(tidymodels)
## Registered S3 method overwritten by 'tune':
## method from
## required_pkgs.model_spec parsnip
## ── Attaching packages ────────────────────────────────────── tidymodels 0.1.3 ──
## ✓ broom 0.7.9.9000 ✓ recipes 0.1.16
## ✓ dials 0.0.9 ✓ rsample 0.1.0
## ✓ dplyr 1.0.7 ✓ tibble 3.1.6
## ✓ ggplot2 3.3.5 ✓ tidyr 1.1.3
## ✓ infer 1.0.0 ✓ tune 0.1.6
## ✓ modeldata 0.1.1 ✓ workflows 0.2.3
## ✓ parsnip 0.1.7 ✓ workflowsets 0.1.0
## ✓ purrr 0.3.4 ✓ yardstick 0.0.8
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## x purrr::discard() masks scales::discard()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x recipes::step() masks stats::step()
## • Use tidymodels_prefer() to resolve common conflicts.
library(modeltime)
library(modeltime.ensemble)
## Loading required package: modeltime.resample
library(modeltime.resample)
# Time Series
library(timetk)
# Core
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ readr 2.0.1 ✓ forcats 0.5.1
## ✓ stringr 1.4.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x readr::col_factor() masks scales::col_factor()
## x purrr::discard() masks scales::discard()
## x dplyr::filter() masks stats::filter()
## x stringr::fixed() masks recipes::fixed()
## x dplyr::lag() masks stats::lag()
## x readr::spec() masks yardstick::spec()
# Load data
library(readxl)
colones%>%
plot_time_series(Date,value,.facet_ncol = 3, .interactive = F)
FORECAST_HORIZON <- 5
full_data_tbl <- colones%>%
select(Date,value)%>%
future_frame(
.date_var = Date,
.length_out = FORECAST_HORIZON,
.bind_data = T
)
data_prepared_tbl <- full_data_tbl%>%
filter(!is.na(value))
data_prepared_tbl%>%
tk_summary_diagnostics()
## tk_augment_timeseries_signature(): Using the following .date_var variable: Date
## # A tibble: 1 × 12
## n.obs start end units scale tzone diff.minimum diff.q1 diff.median
## <int> <date> <date> <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 246 2001-02-01 2021-07-01 days month UTC 2419200 2592000 2678400
## # … with 3 more variables: diff.mean <dbl>, diff.q3 <dbl>, diff.maximum <dbl>
future_tbl <- full_data_tbl%>%
filter(is.na(value))
splits <- data_prepared_tbl%>%
arrange(Date)%>%
time_series_split(
data_var=Date,
assess = FORECAST_HORIZON,
cumulative = T
)
## Using date_var: Date
splits
## <Analysis/Assess/Total>
## <241/5/246>
recipe_spec_1 <- recipe(value~., training(splits))%>%
step_timeseries_signature(Date)%>%
## Elimina las columnas o atributos que no aportan
step_rm(matches("(.iso$)|(.xts)|(day)|(hour)|(minute)|(second)|(am.pm)|(week)")) %>%
step_normalize(Date_index.num,Date_year)%>%
step_mutate(Date_month = factor(Date_month,ordered = T))%>%
step_dummy(all_nominal(),one_hot = T)
recipe_spec_1 %>% prep() %>% juice() %>% glimpse()
## Rows: 241
## Columns: 30
## $ Date <date> 2001-02-01, 2001-03-01, 2001-04-01, 2001-05-01, 200…
## $ value <dbl> 12637.44, 13569.26, 11895.00, 12882.29, 13393.90, 11…
## $ Date_index.num <dbl> -1.720507, -1.707312, -1.692703, -1.678565, -1.66395…
## $ Date_year <dbl> -1.656391, -1.656391, -1.656391, -1.656391, -1.65639…
## $ Date_half <int> 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 2…
## $ Date_quarter <int> 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 1, 1, 1, 2, 2, 2, 3…
## $ Date_month_01 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0…
## $ Date_month_02 <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0…
## $ Date_month_03 <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0…
## $ Date_month_04 <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0…
## $ Date_month_05 <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0…
## $ Date_month_06 <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0…
## $ Date_month_07 <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ Date_month_08 <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month_09 <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month_10 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month_11 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month_12 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month.lbl_01 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0…
## $ Date_month.lbl_02 <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0…
## $ Date_month.lbl_03 <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0…
## $ Date_month.lbl_04 <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0…
## $ Date_month.lbl_05 <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0…
## $ Date_month.lbl_06 <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0…
## $ Date_month.lbl_07 <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ Date_month.lbl_08 <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month.lbl_09 <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month.lbl_10 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month.lbl_11 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month.lbl_12 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0…
recipe_spec_2 <- recipe_spec_1%>%
update_role(Date,new_role = "ID")
recipe_spec_2 %>% prep() %>% juice() %>% glimpse()
## Rows: 241
## Columns: 30
## $ Date <date> 2001-02-01, 2001-03-01, 2001-04-01, 2001-05-01, 200…
## $ value <dbl> 12637.44, 13569.26, 11895.00, 12882.29, 13393.90, 11…
## $ Date_index.num <dbl> -1.720507, -1.707312, -1.692703, -1.678565, -1.66395…
## $ Date_year <dbl> -1.656391, -1.656391, -1.656391, -1.656391, -1.65639…
## $ Date_half <int> 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 2…
## $ Date_quarter <int> 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 1, 1, 1, 2, 2, 2, 3…
## $ Date_month_01 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0…
## $ Date_month_02 <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0…
## $ Date_month_03 <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0…
## $ Date_month_04 <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0…
## $ Date_month_05 <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0…
## $ Date_month_06 <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0…
## $ Date_month_07 <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ Date_month_08 <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month_09 <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month_10 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month_11 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month_12 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month.lbl_01 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0…
## $ Date_month.lbl_02 <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0…
## $ Date_month.lbl_03 <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0…
## $ Date_month.lbl_04 <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0…
## $ Date_month.lbl_05 <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0…
## $ Date_month.lbl_06 <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0…
## $ Date_month.lbl_07 <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ Date_month.lbl_08 <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month.lbl_09 <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month.lbl_10 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month.lbl_11 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month.lbl_12 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0…
recipe_spec_1 %>% prep() %>% summary()
## # A tibble: 30 × 4
## variable type role source
## <chr> <chr> <chr> <chr>
## 1 Date date predictor original
## 2 value numeric outcome original
## 3 Date_index.num numeric predictor derived
## 4 Date_year numeric predictor derived
## 5 Date_half numeric predictor derived
## 6 Date_quarter numeric predictor derived
## 7 Date_month_01 numeric predictor derived
## 8 Date_month_02 numeric predictor derived
## 9 Date_month_03 numeric predictor derived
## 10 Date_month_04 numeric predictor derived
## # … with 20 more rows
recipe_spec_2 %>% prep() %>% summary()
## # A tibble: 30 × 4
## variable type role source
## <chr> <chr> <chr> <chr>
## 1 Date date ID original
## 2 value numeric outcome original
## 3 Date_index.num numeric predictor derived
## 4 Date_year numeric predictor derived
## 5 Date_half numeric predictor derived
## 6 Date_quarter numeric predictor derived
## 7 Date_month_01 numeric predictor derived
## 8 Date_month_02 numeric predictor derived
## 9 Date_month_03 numeric predictor derived
## 10 Date_month_04 numeric predictor derived
## # … with 20 more rows
wflw_fit_autoarima_boost <- workflow()%>%
add_model(
arima_boost(
min_n = 2,
learn_rate = 0.015
) %>%
set_engine(engine = "auto_arima_xgboost")
)%>%
add_recipe(recipe_spec_1)%>%
fit(training(splits))
## frequency = 12 observations per 1 year
wflw_fit_prophet <- workflow()%>%
add_model(
prophet_reg() %>% set_engine("prophet")
)%>%
add_recipe(recipe_spec_1)%>%
fit(training(splits))
## Disabling weekly seasonality. Run prophet with weekly.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
wflw_fit_xgboost_0_015 <- workflow()%>%
add_model(
boost_tree(learn_rate=0.15) %>% set_engine("xgboost")
)%>%
add_recipe(recipe_spec_2)%>%
fit(training(splits))
wflw_fit_xgboost_0_1 <- workflow()%>%
add_model(
boost_tree(learn_rate=0.1) %>% set_engine("xgboost")
)%>%
add_recipe(recipe_spec_2)%>%
fit(training(splits))
wflw_fit_xgboost_0_3 <- workflow()%>%
add_model(
boost_tree(learn_rate=0.3) %>% set_engine("xgboost")
)%>%
add_recipe(recipe_spec_2)%>%
fit(training(splits))
wflw_fit_rf_1000 <- workflow()%>%
add_model(
rand_forest(
trees = 1000
) %>%
set_engine("randomForest")
)%>%
add_recipe(recipe_spec_2)%>%
fit(training(splits))
wflw_fit_rf_500 <- workflow()%>%
add_model(
rand_forest(
trees = 500
) %>%
set_engine("randomForest")
)%>%
add_recipe(recipe_spec_2)%>%
fit(training(splits))
wflw_fit_rf_200 <- workflow()%>%
add_model(
rand_forest(
trees = 200
) %>%
set_engine("randomForest")
)%>%
add_recipe(recipe_spec_2)%>%
fit(training(splits))
wflw_fit_svm <- workflow()%>%
add_model(
svm_rbf() %>% set_engine("kernlab")
)%>%
add_recipe(recipe_spec_2)%>%
fit(training(splits))
wflw_fit_prophet_boost <- workflow()%>%
add_model(
prophet_boost(
seasonality_yearly = F,
seasonality_weekly = F,
seasonality_daily = F,
) %>%
set_engine("prophet_xgboost")
)%>%
add_recipe(recipe_spec_1)%>%
fit(training(splits))
submodels_tbl <- modeltime_table(
wflw_fit_autoarima_boost,
#wflw_fit_prophet, #1
wflw_fit_prophet_boost, #2
#wflw_fit_xgboost_0_015, #3
#wflw_fit_xgboost_0_1, #4
wflw_fit_xgboost_0_3, #5
#wflw_fit_rf_1000, #6
wflw_fit_rf_500 #, #7
#wflw_fit_rf_200, #8
#wflw_fit_svm #9
)
submodels_tbl
## # Modeltime Table
## # A tibble: 4 × 3
## .model_id .model .model_desc
## <int> <list> <chr>
## 1 1 <workflow> ARIMA(1,1,2)(2,0,0)[12] W/ XGBOOST ERRORS
## 2 2 <workflow> PROPHET W/ XGBOOST ERRORS
## 3 3 <workflow> XGBOOST
## 4 4 <workflow> RANDOMFOREST
submodels_calibrated_tbl <- submodels_tbl %>%
modeltime_calibrate(testing(splits))
submodels_calibrated_tbl
## # Modeltime Table
## # A tibble: 4 × 5
## .model_id .model .model_desc .type .calibration_da…
## <int> <list> <chr> <chr> <list>
## 1 1 <workflow> ARIMA(1,1,2)(2,0,0)[12] W/ XGBOOST ERRORS Test <tibble [5 × 4]>
## 2 2 <workflow> PROPHET W/ XGBOOST ERRORS Test <tibble [5 × 4]>
## 3 3 <workflow> XGBOOST Test <tibble [5 × 4]>
## 4 4 <workflow> RANDOMFOREST Test <tibble [5 × 4]>
submodels_calibrated_tbl%>%
modeltime_accuracy()%>%
arrange(rmse)
## # A tibble: 4 × 9
## .model_id .model_desc .type mae mape mase smape rmse rsq
## <int> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 ARIMA(1,1,2)(2,0,0)[12]… Test 32499. 3.20 0.568 3.21 37479. 0.876
## 2 2 PROPHET W/ XGBOOST ERRO… Test 52131. 5.27 0.911 5.21 64436. 0.963
## 3 3 XGBOOST Test 46464. 4.47 0.812 4.67 66341. 0.521
## 4 4 RANDOMFOREST Test 55565. 5.30 0.971 5.52 71791. 0.938
submodels_calibrated_tbl %>%
modeltime_forecast(
new_data = testing(splits),
actual_data = data_prepared_tbl,
keep_data = T
)%>%
plot_modeltime_forecast(
.facet_ncol=2
)
submodels_refit_tbl <- submodels_calibrated_tbl %>%
modeltime_refit(data_prepared_tbl)
## frequency = 12 observations per 1 year
submodels_refit_tbl%>%
modeltime_forecast(
new_data = future_tbl,
actual_data = data_prepared_tbl,
keep_data = T
)%>%
plot_modeltime_forecast(
.facet_ncol=2
)
https://cran.r-project.org/web/packages/modeltime.resample/vignettes/getting-started.html
resamples_tscv <- time_series_cv(
data = data_prepared_tbl,
date_var = Date,
assess = FORECAST_HORIZON,
initial = "36 month",
skip = FORECAST_HORIZON,
slice_limit = 5
)
resamples_tscv
## # Time Series Cross Validation Plan
## # A tibble: 5 × 2
## splits id
## <list> <chr>
## 1 <split [36/5]> Slice1
## 2 <split [36/5]> Slice2
## 3 <split [36/5]> Slice3
## 4 <split [36/5]> Slice4
## 5 <split [36/5]> Slice5
resamples_tscv %>%
tk_time_series_cv_plan() %>%
plot_time_series_cv_plan(Date,
value,
.facet_ncol = 2,
.interactive = T)
resamples_fitted <- submodels_tbl %>%
modeltime_fit_resamples(
resamples = resamples_tscv,
control = control_resamples(verbose = FALSE)
)
resamples_fitted
## # Modeltime Table
## # A tibble: 4 × 4
## .model_id .model .model_desc .resample_resul…
## <int> <list> <chr> <list>
## 1 1 <workflow> ARIMA(1,1,2)(2,0,0)[12] W/ XGBOOST ERRORS <rsmp[+]>
## 2 2 <workflow> PROPHET W/ XGBOOST ERRORS <rsmp[+]>
## 3 3 <workflow> XGBOOST <rsmp[+]>
## 4 4 <workflow> RANDOMFOREST <rsmp[+]>
resamples_fitted %>%
plot_modeltime_resamples(
.point_size = 3,
.point_alpha = 0.8,
.interactive = T
)
## Warning: A correlation computation is required, but `estimate` is constant
## and has 0 standard deviation, resulting in a divide by 0 error. `NA` will be
## returned.
resamples_fitted %>%
modeltime_resample_accuracy(summary_fns = mean) %>%
table_modeltime_accuracy(.interactive = T)
## Warning: A correlation computation is required, but `estimate` is constant
## and has 0 standard deviation, resulting in a divide by 0 error. `NA` will be
## returned.
ensemble_fit_mean <- submodels_tbl %>%
#filter(!.model_id %in% c(1))%>%
ensemble_average(type="mean")
ensemble_fit_lm <- resamples_fitted %>%
ensemble_model_spec(
model_spec = linear_reg(
penalty = tune(),
mixture = tune()
) %>%
set_engine("glmnet"),
grid = 2,
control = control_grid(verbose = TRUE)
)
## ── Tuning Model Specification ───────────────────────────────────
## ℹ Performing 5-Fold Cross Validation.
## i Fold1: preprocessor 1/1
## ✓ Fold1: preprocessor 1/1
## i Fold1: preprocessor 1/1, model 1/2
## ✓ Fold1: preprocessor 1/1, model 1/2
## i Fold1: preprocessor 1/1, model 1/2 (predictions)
## i Fold1: preprocessor 1/1, model 2/2
## ✓ Fold1: preprocessor 1/1, model 2/2
## i Fold1: preprocessor 1/1, model 2/2 (predictions)
## i Fold2: preprocessor 1/1
## ✓ Fold2: preprocessor 1/1
## i Fold2: preprocessor 1/1, model 1/2
## ✓ Fold2: preprocessor 1/1, model 1/2
## i Fold2: preprocessor 1/1, model 1/2 (predictions)
## i Fold2: preprocessor 1/1, model 2/2
## ✓ Fold2: preprocessor 1/1, model 2/2
## i Fold2: preprocessor 1/1, model 2/2 (predictions)
## i Fold3: preprocessor 1/1
## ✓ Fold3: preprocessor 1/1
## i Fold3: preprocessor 1/1, model 1/2
## ✓ Fold3: preprocessor 1/1, model 1/2
## i Fold3: preprocessor 1/1, model 1/2 (predictions)
## i Fold3: preprocessor 1/1, model 2/2
## ✓ Fold3: preprocessor 1/1, model 2/2
## i Fold3: preprocessor 1/1, model 2/2 (predictions)
## i Fold4: preprocessor 1/1
## ✓ Fold4: preprocessor 1/1
## i Fold4: preprocessor 1/1, model 1/2
## ✓ Fold4: preprocessor 1/1, model 1/2
## i Fold4: preprocessor 1/1, model 1/2 (predictions)
## i Fold4: preprocessor 1/1, model 2/2
## ✓ Fold4: preprocessor 1/1, model 2/2
## i Fold4: preprocessor 1/1, model 2/2 (predictions)
## i Fold5: preprocessor 1/1
## ✓ Fold5: preprocessor 1/1
## i Fold5: preprocessor 1/1, model 1/2
## ✓ Fold5: preprocessor 1/1, model 1/2
## i Fold5: preprocessor 1/1, model 1/2 (predictions)
## i Fold5: preprocessor 1/1, model 2/2
## ✓ Fold5: preprocessor 1/1, model 2/2
## i Fold5: preprocessor 1/1, model 2/2 (predictions)
## ✓ Finished tuning Model Specification.
## ℹ Model Parameters:
## # A tibble: 1 × 8
## penalty mixture .metric .estimator mean n std_err .config
## <dbl> <dbl> <chr> <chr> <dbl> <int> <dbl> <chr>
## 1 0.00181 0.361 rmse standard 107140. 5 11450. Preprocessor1_Model1
## ℹ Prediction Error Comparison:
## # A tibble: 5 × 3
## .model_id rmse .model_desc
## <chr> <dbl> <chr>
## 1 1 128924. ARIMA(1,1,2)(2,0,0)[12] W/ XGBOOST ERRORS
## 2 2 116572. PROPHET W/ XGBOOST ERRORS
## 3 3 121334. XGBOOST
## 4 4 124179. RANDOMFOREST
## 5 ensemble 86134. ENSEMBLE (MODEL SPEC)
##
## ── Final Model ──────────────────────────────────────────────────
## ℹ Model Workflow:
## ══ Workflow [trained] ══════════════════════════════════════════════════════════
## Preprocessor: Recipe
## Model: linear_reg()
##
## ── Preprocessor ────────────────────────────────────────────────────────────────
## 0 Recipe Steps
##
## ── Model ───────────────────────────────────────────────────────────────────────
##
## Call: glmnet::glmnet(x = maybe_matrix(x), y = y, family = "gaussian", alpha = ~0.360970945999725)
##
## Df %Dev Lambda
## 1 0 0.00 281000
## 2 3 5.04 256100
## 3 3 11.23 233300
## 4 3 16.77 212600
## 5 3 21.72 193700
## 6 3 26.11 176500
## 7 4 30.03 160800
## 8 4 33.46 146500
## 9 4 36.45 133500
## 10 4 39.05 121700
## 11 4 41.30 110900
## 12 4 43.25 101000
## 13 4 44.95 92030
## 14 4 46.42 83860
## 15 4 47.69 76410
## 16 4 48.80 69620
## 17 3 49.78 63430
## 18 3 50.59 57800
## 19 3 51.29 52660
## 20 3 51.89 47990
## 21 3 52.41 43720
## 22 3 52.85 39840
## 23 3 53.24 36300
## 24 3 53.58 33070
## 25 3 53.87 30140
## 26 3 54.13 27460
## 27 3 54.36 25020
## 28 3 54.56 22800
## 29 3 54.74 20770
## 30 3 54.91 18930
## 31 3 55.05 17240
## 32 3 55.19 15710
## 33 3 55.31 14320
## 34 3 55.43 13050
## 35 3 55.54 11890
## 36 3 55.64 10830
## 37 2 55.71 9868
## 38 2 55.76 8992
## 39 2 55.80 8193
## 40 2 55.83 7465
## 41 2 55.86 6802
## 42 3 56.07 6198
## 43 3 56.40 5647
## 44 3 56.71 5145
## 45 3 57.00 4688
## 46 3 57.27 4272
##
## ...
## and 46 more lines.
##
## 5.951 sec elapsed
ensemble_fit_xg<- resamples_fitted %>%
ensemble_model_spec(
model_spec = boost_tree(
mtry=tune(),
trees=tune(),
learn_rate = tune()
) %>% set_engine("xgboost"),
control = control_grid(verbose = TRUE)
)
## ── Tuning Model Specification ───────────────────────────────────
## ℹ Performing 5-Fold Cross Validation.
## i Creating pre-processing data to finalize unknown parameter: mtry
## i Fold1: preprocessor 1/1
## ✓ Fold1: preprocessor 1/1
## i Fold1: preprocessor 1/1, model 1/6
## ✓ Fold1: preprocessor 1/1, model 1/6
## i Fold1: preprocessor 1/1, model 1/6 (predictions)
## i Fold1: preprocessor 1/1, model 2/6
## ✓ Fold1: preprocessor 1/1, model 2/6
## i Fold1: preprocessor 1/1, model 2/6 (predictions)
## i Fold1: preprocessor 1/1, model 3/6
## ✓ Fold1: preprocessor 1/1, model 3/6
## i Fold1: preprocessor 1/1, model 3/6 (predictions)
## i Fold1: preprocessor 1/1, model 4/6
## ✓ Fold1: preprocessor 1/1, model 4/6
## i Fold1: preprocessor 1/1, model 4/6 (predictions)
## i Fold1: preprocessor 1/1, model 5/6
## ✓ Fold1: preprocessor 1/1, model 5/6
## i Fold1: preprocessor 1/1, model 5/6 (predictions)
## i Fold1: preprocessor 1/1, model 6/6
## ✓ Fold1: preprocessor 1/1, model 6/6
## i Fold1: preprocessor 1/1, model 6/6 (predictions)
## i Fold2: preprocessor 1/1
## ✓ Fold2: preprocessor 1/1
## i Fold2: preprocessor 1/1, model 1/6
## ✓ Fold2: preprocessor 1/1, model 1/6
## i Fold2: preprocessor 1/1, model 1/6 (predictions)
## i Fold2: preprocessor 1/1, model 2/6
## ✓ Fold2: preprocessor 1/1, model 2/6
## i Fold2: preprocessor 1/1, model 2/6 (predictions)
## i Fold2: preprocessor 1/1, model 3/6
## ✓ Fold2: preprocessor 1/1, model 3/6
## i Fold2: preprocessor 1/1, model 3/6 (predictions)
## i Fold2: preprocessor 1/1, model 4/6
## ✓ Fold2: preprocessor 1/1, model 4/6
## i Fold2: preprocessor 1/1, model 4/6 (predictions)
## i Fold2: preprocessor 1/1, model 5/6
## ✓ Fold2: preprocessor 1/1, model 5/6
## i Fold2: preprocessor 1/1, model 5/6 (predictions)
## i Fold2: preprocessor 1/1, model 6/6
## ✓ Fold2: preprocessor 1/1, model 6/6
## i Fold2: preprocessor 1/1, model 6/6 (predictions)
## i Fold3: preprocessor 1/1
## ✓ Fold3: preprocessor 1/1
## i Fold3: preprocessor 1/1, model 1/6
## ✓ Fold3: preprocessor 1/1, model 1/6
## i Fold3: preprocessor 1/1, model 1/6 (predictions)
## i Fold3: preprocessor 1/1, model 2/6
## ✓ Fold3: preprocessor 1/1, model 2/6
## i Fold3: preprocessor 1/1, model 2/6 (predictions)
## i Fold3: preprocessor 1/1, model 3/6
## ✓ Fold3: preprocessor 1/1, model 3/6
## i Fold3: preprocessor 1/1, model 3/6 (predictions)
## i Fold3: preprocessor 1/1, model 4/6
## ✓ Fold3: preprocessor 1/1, model 4/6
## i Fold3: preprocessor 1/1, model 4/6 (predictions)
## i Fold3: preprocessor 1/1, model 5/6
## ✓ Fold3: preprocessor 1/1, model 5/6
## i Fold3: preprocessor 1/1, model 5/6 (predictions)
## i Fold3: preprocessor 1/1, model 6/6
## ✓ Fold3: preprocessor 1/1, model 6/6
## i Fold3: preprocessor 1/1, model 6/6 (predictions)
## i Fold4: preprocessor 1/1
## ✓ Fold4: preprocessor 1/1
## i Fold4: preprocessor 1/1, model 1/6
## ✓ Fold4: preprocessor 1/1, model 1/6
## i Fold4: preprocessor 1/1, model 1/6 (predictions)
## i Fold4: preprocessor 1/1, model 2/6
## ✓ Fold4: preprocessor 1/1, model 2/6
## i Fold4: preprocessor 1/1, model 2/6 (predictions)
## i Fold4: preprocessor 1/1, model 3/6
## ✓ Fold4: preprocessor 1/1, model 3/6
## i Fold4: preprocessor 1/1, model 3/6 (predictions)
## i Fold4: preprocessor 1/1, model 4/6
## ✓ Fold4: preprocessor 1/1, model 4/6
## i Fold4: preprocessor 1/1, model 4/6 (predictions)
## i Fold4: preprocessor 1/1, model 5/6
## ✓ Fold4: preprocessor 1/1, model 5/6
## i Fold4: preprocessor 1/1, model 5/6 (predictions)
## i Fold4: preprocessor 1/1, model 6/6
## ✓ Fold4: preprocessor 1/1, model 6/6
## i Fold4: preprocessor 1/1, model 6/6 (predictions)
## i Fold5: preprocessor 1/1
## ✓ Fold5: preprocessor 1/1
## i Fold5: preprocessor 1/1, model 1/6
## ✓ Fold5: preprocessor 1/1, model 1/6
## i Fold5: preprocessor 1/1, model 1/6 (predictions)
## i Fold5: preprocessor 1/1, model 2/6
## ✓ Fold5: preprocessor 1/1, model 2/6
## i Fold5: preprocessor 1/1, model 2/6 (predictions)
## i Fold5: preprocessor 1/1, model 3/6
## ✓ Fold5: preprocessor 1/1, model 3/6
## i Fold5: preprocessor 1/1, model 3/6 (predictions)
## i Fold5: preprocessor 1/1, model 4/6
## ✓ Fold5: preprocessor 1/1, model 4/6
## i Fold5: preprocessor 1/1, model 4/6 (predictions)
## i Fold5: preprocessor 1/1, model 5/6
## ✓ Fold5: preprocessor 1/1, model 5/6
## i Fold5: preprocessor 1/1, model 5/6 (predictions)
## i Fold5: preprocessor 1/1, model 6/6
## ✓ Fold5: preprocessor 1/1, model 6/6
## i Fold5: preprocessor 1/1, model 6/6 (predictions)
## ✓ Finished tuning Model Specification.
## ℹ Model Parameters:
## # A tibble: 1 × 9
## mtry trees learn_rate .metric .estimator mean n std_err .config
## <int> <int> <dbl> <chr> <chr> <dbl> <int> <dbl> <chr>
## 1 1 1762 0.00209 rmse standard 83060. 5 13115. Preprocessor1_…
## ℹ Prediction Error Comparison:
## # A tibble: 5 × 3
## .model_id rmse .model_desc
## <chr> <dbl> <chr>
## 1 1 128924. ARIMA(1,1,2)(2,0,0)[12] W/ XGBOOST ERRORS
## 2 2 116572. PROPHET W/ XGBOOST ERRORS
## 3 3 121334. XGBOOST
## 4 4 124179. RANDOMFOREST
## 5 ensemble 52578. ENSEMBLE (MODEL SPEC)
##
## ── Final Model ──────────────────────────────────────────────────
## ℹ Model Workflow:
## ══ Workflow [trained] ══════════════════════════════════════════════════════════
## Preprocessor: Recipe
## Model: boost_tree()
##
## ── Preprocessor ────────────────────────────────────────────────────────────────
## 0 Recipe Steps
##
## ── Model ───────────────────────────────────────────────────────────────────────
## ##### xgb.Booster
## raw: 1.3 Mb
## call:
## xgboost::xgb.train(params = list(eta = 0.00209271548061252, max_depth = 6,
## gamma = 0, colsample_bytree = 1, colsample_bynode = 0.25,
## min_child_weight = 1, subsample = 1, objective = "reg:squarederror"),
## data = x$data, nrounds = 1762L, watchlist = x$watchlist,
## verbose = 0, nthread = 1)
## params (as set within xgb.train):
## eta = "0.00209271548061252", max_depth = "6", gamma = "0", colsample_bytree = "1", colsample_bynode = "0.25", min_child_weight = "1", subsample = "1", objective = "reg:squarederror", nthread = "1", validate_parameters = "TRUE"
## xgb.attributes:
## niter
## callbacks:
## cb.evaluation.log()
## # of features: 4
## niter: 1762
## nfeatures : 4
## evaluation_log:
## iter training_rmse
## 1 932120.44
## 2 930284.50
## ---
## 1761 52642.61
## 1762 52578.50
##
## 22.054 sec elapsed
ensemble_tbl<- modeltime_table(
ensemble_fit_mean,
ensemble_fit_lm,
ensemble_fit_xg
)
ensemble_tbl%>%
combine_modeltime_tables(submodels_tbl)%>%
modeltime_accuracy(testing(splits))%>%
arrange(rmse,mae,mape,mase)%>%
knitr::kable()
| .model_id | .model_desc | .type | mae | mape | mase | smape | rmse | rsq |
|---|---|---|---|---|---|---|---|---|
| 4 | ARIMA(1,1,2)(2,0,0)[12] W/ XGBOOST ERRORS | Test | 32498.55 | 3.200357 | 0.5677113 | 3.206721 | 37479.12 | 0.8764449 |
| 1 | ENSEMBLE (MEAN): 4 MODELS | Test | 31470.17 | 2.950817 | 0.5497468 | 3.046988 | 50382.44 | 0.8443752 |
| 2 | ENSEMBLE (GLMNET STACK): 4 MODELS | Test | 51249.20 | 5.022606 | 0.8952631 | 5.058998 | 61370.95 | 0.3851515 |
| 3 | ENSEMBLE (XGBOOST STACK): 4 MODELS | Test | 53505.92 | 5.228635 | 0.9346854 | 5.347639 | 63400.46 | 0.7831948 |
| 5 | PROPHET W/ XGBOOST ERRORS | Test | 52131.10 | 5.268918 | 0.9106689 | 5.210271 | 64435.97 | 0.9630310 |
| 6 | XGBOOST | Test | 46463.83 | 4.473893 | 0.8116684 | 4.667059 | 66341.33 | 0.5212406 |
| 7 | RANDOMFOREST | Test | 55565.22 | 5.296326 | 0.9706589 | 5.524929 | 71790.80 | 0.9378041 |
ensemble_tbl%>%
modeltime_calibrate(testing(splits))%>%
modeltime_forecast(
new_data = testing(splits),
actual_data = data_prepared_tbl,
keep_data = T,
conf_by_id = T,
conf_interval = 0.95
)%>%
plot_modeltime_forecast(
.facet_ncol=2
)
## Warning: The 'id' column in calibration data was not detected. Global Confidence
## Interval is being returned.
ensemble_tbl%>%
combine_modeltime_tables(submodels_tbl)%>%
modeltime_calibrate(testing(splits))%>%
modeltime_forecast(
new_data = testing(splits),
actual_data = data_prepared_tbl,
keep_data = T
)%>%
plot_modeltime_forecast(
.facet_ncol=2
)
ensemble_tbl_all_model<-ensemble_tbl%>%
combine_modeltime_tables(submodels_tbl)
ensemble_refit_tbl <- ensemble_tbl%>%
modeltime_refit(data_prepared_tbl)
## frequency = 12 observations per 1 year
## Warning in mdl_time_refit.mdl_time_ensemble_model_spec(...): 'resamples' not
## provided during refitting. Submodels will be refit, but the meta-learner will
## *not* be refit. You can provide 'resamples' via `modeltime_refit(object, data,
## resamples, control)`. Proceeding by refitting the submodels only.
## frequency = 12 observations per 1 year
## Warning in mdl_time_refit.mdl_time_ensemble_model_spec(...): 'resamples' not
## provided during refitting. Submodels will be refit, but the meta-learner will
## *not* be refit. You can provide 'resamples' via `modeltime_refit(object, data,
## resamples, control)`. Proceeding by refitting the submodels only.
## frequency = 12 observations per 1 year
ensemble_refit_tbl%>%
modeltime_forecast(
new_data = future_tbl,
actual_data = data_prepared_tbl,
keep_data = T
)%>%
plot_modeltime_forecast(
.facet_ncol=2
)
## Warning: Expecting the following names to be in the data frame: .conf_hi, .conf_lo.
## Proceeding with '.conf_interval_show = FALSE' to visualize the forecast without confidence intervals.
## Alternatively, try using `modeltime_calibrate()` before forecasting to add confidence intervals.
dolares%>%
plot_time_series(Date,value,.facet_ncol = 3, .interactive = F)
FORECAST_HORIZON <- 5
full_data_tbl <- dolares%>%
select(Date,value)%>%
future_frame(
.date_var = Date,
.length_out = FORECAST_HORIZON,
.bind_data = T
)
data_prepared_tbl <- full_data_tbl%>%
filter(!is.na(value))
data_prepared_tbl%>%
tk_summary_diagnostics()
## tk_augment_timeseries_signature(): Using the following .date_var variable: Date
## # A tibble: 1 × 12
## n.obs start end units scale tzone diff.minimum diff.q1 diff.median
## <int> <date> <date> <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 246 2001-02-01 2021-07-01 days month UTC 2419200 2592000 2678400
## # … with 3 more variables: diff.mean <dbl>, diff.q3 <dbl>, diff.maximum <dbl>
future_tbl <- full_data_tbl%>%
filter(is.na(value))
splits <- data_prepared_tbl%>%
arrange(Date)%>%
time_series_split(
data_var=Date,
assess = FORECAST_HORIZON,
cumulative = T
)
## Using date_var: Date
splits
## <Analysis/Assess/Total>
## <241/5/246>
recipe_spec_1 <- recipe(value~., training(splits))%>%
step_timeseries_signature(Date)%>%
## Elimina las columnas o atributos que no aportan
step_rm(matches("(.iso$)|(.xts)|(day)|(hour)|(minute)|(second)|(am.pm)|(week)")) %>%
step_normalize(Date_index.num,Date_year)%>%
step_mutate(Date_month = factor(Date_month,ordered = T))%>%
step_dummy(all_nominal(),one_hot = T)
recipe_spec_1 %>% prep() %>% juice() %>% glimpse()
## Rows: 241
## Columns: 30
## $ Date <date> 2001-02-01, 2001-03-01, 2001-04-01, 2001-05-01, 200…
## $ value <dbl> 21.8817, 24.1889, 24.6323, 30.7223, 30.6749, 31.9302…
## $ Date_index.num <dbl> -1.720507, -1.707312, -1.692703, -1.678565, -1.66395…
## $ Date_year <dbl> -1.656391, -1.656391, -1.656391, -1.656391, -1.65639…
## $ Date_half <int> 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 2…
## $ Date_quarter <int> 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 1, 1, 1, 2, 2, 2, 3…
## $ Date_month_01 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0…
## $ Date_month_02 <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0…
## $ Date_month_03 <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0…
## $ Date_month_04 <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0…
## $ Date_month_05 <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0…
## $ Date_month_06 <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0…
## $ Date_month_07 <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ Date_month_08 <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month_09 <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month_10 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month_11 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month_12 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month.lbl_01 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0…
## $ Date_month.lbl_02 <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0…
## $ Date_month.lbl_03 <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0…
## $ Date_month.lbl_04 <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0…
## $ Date_month.lbl_05 <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0…
## $ Date_month.lbl_06 <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0…
## $ Date_month.lbl_07 <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ Date_month.lbl_08 <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month.lbl_09 <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month.lbl_10 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month.lbl_11 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month.lbl_12 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0…
recipe_spec_2 <- recipe_spec_1%>%
update_role(Date,new_role = "ID")
recipe_spec_2 %>% prep() %>% juice() %>% glimpse()
## Rows: 241
## Columns: 30
## $ Date <date> 2001-02-01, 2001-03-01, 2001-04-01, 2001-05-01, 200…
## $ value <dbl> 21.8817, 24.1889, 24.6323, 30.7223, 30.6749, 31.9302…
## $ Date_index.num <dbl> -1.720507, -1.707312, -1.692703, -1.678565, -1.66395…
## $ Date_year <dbl> -1.656391, -1.656391, -1.656391, -1.656391, -1.65639…
## $ Date_half <int> 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 2…
## $ Date_quarter <int> 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 1, 1, 1, 2, 2, 2, 3…
## $ Date_month_01 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0…
## $ Date_month_02 <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0…
## $ Date_month_03 <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0…
## $ Date_month_04 <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0…
## $ Date_month_05 <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0…
## $ Date_month_06 <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0…
## $ Date_month_07 <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ Date_month_08 <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month_09 <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month_10 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month_11 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month_12 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month.lbl_01 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0…
## $ Date_month.lbl_02 <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0…
## $ Date_month.lbl_03 <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0…
## $ Date_month.lbl_04 <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0…
## $ Date_month.lbl_05 <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0…
## $ Date_month.lbl_06 <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0…
## $ Date_month.lbl_07 <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ Date_month.lbl_08 <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month.lbl_09 <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month.lbl_10 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month.lbl_11 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month.lbl_12 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0…
recipe_spec_1 %>% prep() %>% summary()
## # A tibble: 30 × 4
## variable type role source
## <chr> <chr> <chr> <chr>
## 1 Date date predictor original
## 2 value numeric outcome original
## 3 Date_index.num numeric predictor derived
## 4 Date_year numeric predictor derived
## 5 Date_half numeric predictor derived
## 6 Date_quarter numeric predictor derived
## 7 Date_month_01 numeric predictor derived
## 8 Date_month_02 numeric predictor derived
## 9 Date_month_03 numeric predictor derived
## 10 Date_month_04 numeric predictor derived
## # … with 20 more rows
recipe_spec_2 %>% prep() %>% summary()
## # A tibble: 30 × 4
## variable type role source
## <chr> <chr> <chr> <chr>
## 1 Date date ID original
## 2 value numeric outcome original
## 3 Date_index.num numeric predictor derived
## 4 Date_year numeric predictor derived
## 5 Date_half numeric predictor derived
## 6 Date_quarter numeric predictor derived
## 7 Date_month_01 numeric predictor derived
## 8 Date_month_02 numeric predictor derived
## 9 Date_month_03 numeric predictor derived
## 10 Date_month_04 numeric predictor derived
## # … with 20 more rows
wflw_fit_autoarima_boost <- workflow()%>%
add_model(
arima_boost(
min_n = 2,
learn_rate = 0.015
) %>%
set_engine(engine = "auto_arima_xgboost")
)%>%
add_recipe(recipe_spec_1)%>%
fit(training(splits))
## frequency = 12 observations per 1 year
wflw_fit_prophet <- workflow()%>%
add_model(
prophet_reg() %>% set_engine("prophet")
)%>%
add_recipe(recipe_spec_1)%>%
fit(training(splits))
## Disabling weekly seasonality. Run prophet with weekly.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
wflw_fit_xgboost_0_015 <- workflow()%>%
add_model(
boost_tree(learn_rate=0.15) %>% set_engine("xgboost")
)%>%
add_recipe(recipe_spec_2)%>%
fit(training(splits))
wflw_fit_xgboost_0_1 <- workflow()%>%
add_model(
boost_tree(learn_rate=0.1) %>% set_engine("xgboost")
)%>%
add_recipe(recipe_spec_2)%>%
fit(training(splits))
wflw_fit_xgboost_0_3 <- workflow()%>%
add_model(
boost_tree(learn_rate=0.3) %>% set_engine("xgboost")
)%>%
add_recipe(recipe_spec_2)%>%
fit(training(splits))
wflw_fit_rf_1000 <- workflow()%>%
add_model(
rand_forest(
trees = 1000
) %>%
set_engine("randomForest")
)%>%
add_recipe(recipe_spec_2)%>%
fit(training(splits))
wflw_fit_rf_500 <- workflow()%>%
add_model(
rand_forest(
trees = 500
) %>%
set_engine("randomForest")
)%>%
add_recipe(recipe_spec_2)%>%
fit(training(splits))
wflw_fit_rf_200 <- workflow()%>%
add_model(
rand_forest(
trees = 200
) %>%
set_engine("randomForest")
)%>%
add_recipe(recipe_spec_2)%>%
fit(training(splits))
wflw_fit_svm <- workflow()%>%
add_model(
svm_rbf() %>% set_engine("kernlab")
)%>%
add_recipe(recipe_spec_2)%>%
fit(training(splits))
wflw_fit_prophet_boost <- workflow()%>%
add_model(
prophet_boost(
seasonality_yearly = F,
seasonality_weekly = F,
seasonality_daily = F,
) %>%
set_engine("prophet_xgboost")
)%>%
add_recipe(recipe_spec_1)%>%
fit(training(splits))
submodels_tbl <- modeltime_table(
wflw_fit_autoarima_boost,
#wflw_fit_prophet, #1
wflw_fit_prophet_boost, #2
#wflw_fit_xgboost_0_015, #3
#wflw_fit_xgboost_0_1, #4
wflw_fit_xgboost_0_3, #5
#wflw_fit_rf_1000, #6
wflw_fit_rf_500 #, #7
#wflw_fit_rf_200, #8
#wflw_fit_svm #9
)
submodels_tbl
## # Modeltime Table
## # A tibble: 4 × 3
## .model_id .model .model_desc
## <int> <list> <chr>
## 1 1 <workflow> ARIMA(0,1,0)(1,0,0)[12] WITH DRIFT W/ XGBOOST ERRORS
## 2 2 <workflow> PROPHET W/ XGBOOST ERRORS
## 3 3 <workflow> XGBOOST
## 4 4 <workflow> RANDOMFOREST
submodels_calibrated_tbl <- submodels_tbl %>%
modeltime_calibrate(testing(splits))
submodels_calibrated_tbl
## # Modeltime Table
## # A tibble: 4 × 5
## .model_id .model .model_desc .type .calibration_da…
## <int> <list> <chr> <chr> <list>
## 1 1 <workflow> ARIMA(0,1,0)(1,0,0)[12] WITH DRIF… Test <tibble [5 × 4]>
## 2 2 <workflow> PROPHET W/ XGBOOST ERRORS Test <tibble [5 × 4]>
## 3 3 <workflow> XGBOOST Test <tibble [5 × 4]>
## 4 4 <workflow> RANDOMFOREST Test <tibble [5 × 4]>
submodels_calibrated_tbl%>%
modeltime_accuracy()%>%
arrange(rmse)
## Warning: A correlation computation is required, but `estimate` is constant
## and has 0 standard deviation, resulting in a divide by 0 error. `NA` will be
## returned.
## # A tibble: 4 × 9
## .model_id .model_desc .type mae mape mase smape rmse rsq
## <int> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 ARIMA(0,1,0)(1,0,0)[12]… Test 66.6 3.76 0.774 3.81 75.8 0.792
## 2 2 PROPHET W/ XGBOOST ERRO… Test 118. 6.53 1.37 6.83 141. 0.0919
## 3 3 XGBOOST Test 168. 9.34 1.95 9.93 192. NA
## 4 4 RANDOMFOREST Test 311. 17.6 3.62 19.4 322. 0.463
submodels_calibrated_tbl %>%
modeltime_forecast(
new_data = testing(splits),
actual_data = data_prepared_tbl,
keep_data = T
)%>%
plot_modeltime_forecast(
.facet_ncol=2
)
submodels_refit_tbl <- submodels_calibrated_tbl %>%
modeltime_refit(data_prepared_tbl)
## frequency = 12 observations per 1 year
submodels_refit_tbl%>%
modeltime_forecast(
new_data = future_tbl,
actual_data = data_prepared_tbl,
keep_data = T
)%>%
plot_modeltime_forecast(
.facet_ncol=2
)
https://cran.r-project.org/web/packages/modeltime.resample/vignettes/getting-started.html
resamples_tscv <- time_series_cv(
data = data_prepared_tbl,
date_var = Date,
assess = FORECAST_HORIZON,
initial = "36 month",
skip = FORECAST_HORIZON,
slice_limit = 5
)
resamples_tscv
## # Time Series Cross Validation Plan
## # A tibble: 5 × 2
## splits id
## <list> <chr>
## 1 <split [36/5]> Slice1
## 2 <split [36/5]> Slice2
## 3 <split [36/5]> Slice3
## 4 <split [36/5]> Slice4
## 5 <split [36/5]> Slice5
resamples_tscv %>%
tk_time_series_cv_plan() %>%
plot_time_series_cv_plan(Date,
value,
.facet_ncol = 2,
.interactive = T)
resamples_fitted <- submodels_tbl %>%
modeltime_fit_resamples(
resamples = resamples_tscv,
control = control_resamples(verbose = FALSE)
)
resamples_fitted
## # Modeltime Table
## # A tibble: 4 × 4
## .model_id .model .model_desc .resample_resul…
## <int> <list> <chr> <list>
## 1 1 <workflow> ARIMA(0,1,0)(1,0,0)[12] WITH DRIFT W/ X… <rsmp[+]>
## 2 2 <workflow> PROPHET W/ XGBOOST ERRORS <rsmp[+]>
## 3 3 <workflow> XGBOOST <rsmp[+]>
## 4 4 <workflow> RANDOMFOREST <rsmp[+]>
resamples_fitted %>%
plot_modeltime_resamples(
.point_size = 3,
.point_alpha = 0.8,
.interactive = T
)
## Warning: A correlation computation is required, but `estimate` is constant
## and has 0 standard deviation, resulting in a divide by 0 error. `NA` will be
## returned.
resamples_fitted %>%
modeltime_resample_accuracy(summary_fns = mean) %>%
table_modeltime_accuracy(.interactive = T)
## Warning: A correlation computation is required, but `estimate` is constant
## and has 0 standard deviation, resulting in a divide by 0 error. `NA` will be
## returned.
ensemble_fit_mean <- submodels_tbl %>%
#filter(!.model_id %in% c(1))%>%
ensemble_average(type="mean")
ensemble_fit_lm <- resamples_fitted %>%
ensemble_model_spec(
model_spec = linear_reg(
penalty = tune(),
mixture = tune()
) %>%
set_engine("glmnet"),
grid = 2,
control = control_grid(verbose = TRUE)
)
## ── Tuning Model Specification ───────────────────────────────────
## ℹ Performing 5-Fold Cross Validation.
## i Fold1: preprocessor 1/1
## ✓ Fold1: preprocessor 1/1
## i Fold1: preprocessor 1/1, model 1/2
## ✓ Fold1: preprocessor 1/1, model 1/2
## i Fold1: preprocessor 1/1, model 1/2 (predictions)
## i Fold1: preprocessor 1/1, model 2/2
## ✓ Fold1: preprocessor 1/1, model 2/2
## i Fold1: preprocessor 1/1, model 2/2 (predictions)
## i Fold2: preprocessor 1/1
## ✓ Fold2: preprocessor 1/1
## i Fold2: preprocessor 1/1, model 1/2
## ✓ Fold2: preprocessor 1/1, model 1/2
## i Fold2: preprocessor 1/1, model 1/2 (predictions)
## i Fold2: preprocessor 1/1, model 2/2
## ✓ Fold2: preprocessor 1/1, model 2/2
## i Fold2: preprocessor 1/1, model 2/2 (predictions)
## i Fold3: preprocessor 1/1
## ✓ Fold3: preprocessor 1/1
## i Fold3: preprocessor 1/1, model 1/2
## ✓ Fold3: preprocessor 1/1, model 1/2
## i Fold3: preprocessor 1/1, model 1/2 (predictions)
## i Fold3: preprocessor 1/1, model 2/2
## ✓ Fold3: preprocessor 1/1, model 2/2
## i Fold3: preprocessor 1/1, model 2/2 (predictions)
## i Fold4: preprocessor 1/1
## ✓ Fold4: preprocessor 1/1
## i Fold4: preprocessor 1/1, model 1/2
## ✓ Fold4: preprocessor 1/1, model 1/2
## i Fold4: preprocessor 1/1, model 1/2 (predictions)
## i Fold4: preprocessor 1/1, model 2/2
## ✓ Fold4: preprocessor 1/1, model 2/2
## i Fold4: preprocessor 1/1, model 2/2 (predictions)
## i Fold5: preprocessor 1/1
## ✓ Fold5: preprocessor 1/1
## i Fold5: preprocessor 1/1, model 1/2
## ✓ Fold5: preprocessor 1/1, model 1/2
## i Fold5: preprocessor 1/1, model 1/2 (predictions)
## i Fold5: preprocessor 1/1, model 2/2
## ✓ Fold5: preprocessor 1/1, model 2/2
## i Fold5: preprocessor 1/1, model 2/2 (predictions)
## ✓ Finished tuning Model Specification.
## ℹ Model Parameters:
## # A tibble: 1 × 8
## penalty mixture .metric .estimator mean n std_err .config
## <dbl> <dbl> <chr> <chr> <dbl> <int> <dbl> <chr>
## 1 0.000174 0.416 rmse standard 104. 5 14.0 Preprocessor1_Model1
## ℹ Prediction Error Comparison:
## # A tibble: 5 × 3
## .model_id rmse .model_desc
## <chr> <dbl> <chr>
## 1 1 178. ARIMA(0,1,0)(1,0,0)[12] WITH DRIFT W/ XGBOOST ERRORS
## 2 2 159. PROPHET W/ XGBOOST ERRORS
## 3 3 189. XGBOOST
## 4 4 206. RANDOMFOREST
## 5 ensemble 73.6 ENSEMBLE (MODEL SPEC)
##
## ── Final Model ──────────────────────────────────────────────────
## ℹ Model Workflow:
## ══ Workflow [trained] ══════════════════════════════════════════════════════════
## Preprocessor: Recipe
## Model: linear_reg()
##
## ── Preprocessor ────────────────────────────────────────────────────────────────
## 0 Recipe Steps
##
## ── Model ───────────────────────────────────────────────────────────────────────
##
## Call: glmnet::glmnet(x = maybe_matrix(x), y = y, family = "gaussian", alpha = ~0.415619745833101)
##
## Df %Dev Lambda
## 1 0 0.00 515.90
## 2 3 9.36 470.10
## 3 3 18.66 428.30
## 4 4 27.19 390.30
## 5 4 34.81 355.60
## 6 4 41.47 324.00
## 7 4 47.27 295.20
## 8 4 52.32 269.00
## 9 4 56.69 245.10
## 10 4 60.47 223.30
## 11 4 63.72 203.50
## 12 4 66.52 185.40
## 13 4 68.93 168.90
## 14 4 70.99 153.90
## 15 4 72.77 140.30
## 16 4 74.29 127.80
## 17 4 75.59 116.40
## 18 4 76.71 106.10
## 19 4 77.68 96.68
## 20 4 78.52 88.09
## 21 4 79.24 80.26
## 22 4 79.88 73.13
## 23 4 80.43 66.64
## 24 4 80.93 60.72
## 25 3 81.33 55.32
## 26 3 81.64 50.41
## 27 3 81.90 45.93
## 28 3 82.13 41.85
## 29 3 82.32 38.13
## 30 3 82.49 34.74
## 31 3 82.63 31.66
## 32 3 82.75 28.85
## 33 3 82.86 26.28
## 34 3 82.95 23.95
## 35 3 83.03 21.82
## 36 3 83.10 19.88
## 37 3 83.16 18.12
## 38 3 83.22 16.51
## 39 3 83.27 15.04
## 40 3 83.32 13.70
## 41 3 83.36 12.49
## 42 3 83.39 11.38
## 43 3 83.43 10.37
## 44 3 83.46 9.45
## 45 4 83.68 8.61
## 46 4 84.20 7.84
##
## ...
## and 52 more lines.
##
## 5.595 sec elapsed
ensemble_fit_xg<- resamples_fitted %>%
ensemble_model_spec(
model_spec = boost_tree(
mtry=tune(),
trees=tune(),
learn_rate = tune()
) %>% set_engine("xgboost"),
control = control_grid(verbose = TRUE)
)
## ── Tuning Model Specification ───────────────────────────────────
## ℹ Performing 5-Fold Cross Validation.
## i Creating pre-processing data to finalize unknown parameter: mtry
## i Fold1: preprocessor 1/1
## ✓ Fold1: preprocessor 1/1
## i Fold1: preprocessor 1/1, model 1/6
## ✓ Fold1: preprocessor 1/1, model 1/6
## i Fold1: preprocessor 1/1, model 1/6 (predictions)
## i Fold1: preprocessor 1/1, model 2/6
## ✓ Fold1: preprocessor 1/1, model 2/6
## i Fold1: preprocessor 1/1, model 2/6 (predictions)
## i Fold1: preprocessor 1/1, model 3/6
## ✓ Fold1: preprocessor 1/1, model 3/6
## i Fold1: preprocessor 1/1, model 3/6 (predictions)
## i Fold1: preprocessor 1/1, model 4/6
## ✓ Fold1: preprocessor 1/1, model 4/6
## i Fold1: preprocessor 1/1, model 4/6 (predictions)
## i Fold1: preprocessor 1/1, model 5/6
## ✓ Fold1: preprocessor 1/1, model 5/6
## i Fold1: preprocessor 1/1, model 5/6 (predictions)
## i Fold1: preprocessor 1/1, model 6/6
## ✓ Fold1: preprocessor 1/1, model 6/6
## i Fold1: preprocessor 1/1, model 6/6 (predictions)
## i Fold2: preprocessor 1/1
## ✓ Fold2: preprocessor 1/1
## i Fold2: preprocessor 1/1, model 1/6
## ✓ Fold2: preprocessor 1/1, model 1/6
## i Fold2: preprocessor 1/1, model 1/6 (predictions)
## i Fold2: preprocessor 1/1, model 2/6
## ✓ Fold2: preprocessor 1/1, model 2/6
## i Fold2: preprocessor 1/1, model 2/6 (predictions)
## i Fold2: preprocessor 1/1, model 3/6
## ✓ Fold2: preprocessor 1/1, model 3/6
## i Fold2: preprocessor 1/1, model 3/6 (predictions)
## i Fold2: preprocessor 1/1, model 4/6
## ✓ Fold2: preprocessor 1/1, model 4/6
## i Fold2: preprocessor 1/1, model 4/6 (predictions)
## i Fold2: preprocessor 1/1, model 5/6
## ✓ Fold2: preprocessor 1/1, model 5/6
## i Fold2: preprocessor 1/1, model 5/6 (predictions)
## i Fold2: preprocessor 1/1, model 6/6
## ✓ Fold2: preprocessor 1/1, model 6/6
## i Fold2: preprocessor 1/1, model 6/6 (predictions)
## i Fold3: preprocessor 1/1
## ✓ Fold3: preprocessor 1/1
## i Fold3: preprocessor 1/1, model 1/6
## ✓ Fold3: preprocessor 1/1, model 1/6
## i Fold3: preprocessor 1/1, model 1/6 (predictions)
## i Fold3: preprocessor 1/1, model 2/6
## ✓ Fold3: preprocessor 1/1, model 2/6
## i Fold3: preprocessor 1/1, model 2/6 (predictions)
## i Fold3: preprocessor 1/1, model 3/6
## ✓ Fold3: preprocessor 1/1, model 3/6
## i Fold3: preprocessor 1/1, model 3/6 (predictions)
## i Fold3: preprocessor 1/1, model 4/6
## ✓ Fold3: preprocessor 1/1, model 4/6
## i Fold3: preprocessor 1/1, model 4/6 (predictions)
## i Fold3: preprocessor 1/1, model 5/6
## ✓ Fold3: preprocessor 1/1, model 5/6
## i Fold3: preprocessor 1/1, model 5/6 (predictions)
## i Fold3: preprocessor 1/1, model 6/6
## ✓ Fold3: preprocessor 1/1, model 6/6
## i Fold3: preprocessor 1/1, model 6/6 (predictions)
## i Fold4: preprocessor 1/1
## ✓ Fold4: preprocessor 1/1
## i Fold4: preprocessor 1/1, model 1/6
## ✓ Fold4: preprocessor 1/1, model 1/6
## i Fold4: preprocessor 1/1, model 1/6 (predictions)
## i Fold4: preprocessor 1/1, model 2/6
## ✓ Fold4: preprocessor 1/1, model 2/6
## i Fold4: preprocessor 1/1, model 2/6 (predictions)
## i Fold4: preprocessor 1/1, model 3/6
## ✓ Fold4: preprocessor 1/1, model 3/6
## i Fold4: preprocessor 1/1, model 3/6 (predictions)
## i Fold4: preprocessor 1/1, model 4/6
## ✓ Fold4: preprocessor 1/1, model 4/6
## i Fold4: preprocessor 1/1, model 4/6 (predictions)
## i Fold4: preprocessor 1/1, model 5/6
## ✓ Fold4: preprocessor 1/1, model 5/6
## i Fold4: preprocessor 1/1, model 5/6 (predictions)
## i Fold4: preprocessor 1/1, model 6/6
## ✓ Fold4: preprocessor 1/1, model 6/6
## i Fold4: preprocessor 1/1, model 6/6 (predictions)
## i Fold5: preprocessor 1/1
## ✓ Fold5: preprocessor 1/1
## i Fold5: preprocessor 1/1, model 1/6
## ✓ Fold5: preprocessor 1/1, model 1/6
## i Fold5: preprocessor 1/1, model 1/6 (predictions)
## i Fold5: preprocessor 1/1, model 2/6
## ✓ Fold5: preprocessor 1/1, model 2/6
## i Fold5: preprocessor 1/1, model 2/6 (predictions)
## i Fold5: preprocessor 1/1, model 3/6
## ✓ Fold5: preprocessor 1/1, model 3/6
## i Fold5: preprocessor 1/1, model 3/6 (predictions)
## i Fold5: preprocessor 1/1, model 4/6
## ✓ Fold5: preprocessor 1/1, model 4/6
## i Fold5: preprocessor 1/1, model 4/6 (predictions)
## i Fold5: preprocessor 1/1, model 5/6
## ✓ Fold5: preprocessor 1/1, model 5/6
## i Fold5: preprocessor 1/1, model 5/6 (predictions)
## i Fold5: preprocessor 1/1, model 6/6
## ✓ Fold5: preprocessor 1/1, model 6/6
## i Fold5: preprocessor 1/1, model 6/6 (predictions)
## ✓ Finished tuning Model Specification.
## ℹ Model Parameters:
## # A tibble: 1 × 9
## mtry trees learn_rate .metric .estimator mean n std_err .config
## <int> <int> <dbl> <chr> <chr> <dbl> <int> <dbl> <chr>
## 1 2 1026 0.0661 rmse standard 85.2 5 10.1 Preprocessor1_M…
## ℹ Prediction Error Comparison:
## # A tibble: 5 × 3
## .model_id rmse .model_desc
## <chr> <dbl> <chr>
## 1 1 178. ARIMA(0,1,0)(1,0,0)[12] WITH DRIFT W/ XGBOOST ERRORS
## 2 2 159. PROPHET W/ XGBOOST ERRORS
## 3 3 189. XGBOOST
## 4 4 206. RANDOMFOREST
## 5 ensemble 0.00117 ENSEMBLE (MODEL SPEC)
##
## ── Final Model ──────────────────────────────────────────────────
## ℹ Model Workflow:
## ══ Workflow [trained] ══════════════════════════════════════════════════════════
## Preprocessor: Recipe
## Model: boost_tree()
##
## ── Preprocessor ────────────────────────────────────────────────────────────────
## 0 Recipe Steps
##
## ── Model ───────────────────────────────────────────────────────────────────────
## ##### xgb.Booster
## raw: 1.1 Mb
## call:
## xgboost::xgb.train(params = list(eta = 0.0661406338030049, max_depth = 6,
## gamma = 0, colsample_bytree = 1, colsample_bynode = 0.5,
## min_child_weight = 1, subsample = 1, objective = "reg:squarederror"),
## data = x$data, nrounds = 1026L, watchlist = x$watchlist,
## verbose = 0, nthread = 1)
## params (as set within xgb.train):
## eta = "0.0661406338030049", max_depth = "6", gamma = "0", colsample_bytree = "1", colsample_bynode = "0.5", min_child_weight = "1", subsample = "1", objective = "reg:squarederror", nthread = "1", validate_parameters = "TRUE"
## xgb.attributes:
## niter
## callbacks:
## cb.evaluation.log()
## # of features: 4
## niter: 1026
## nfeatures : 4
## evaluation_log:
## iter training_rmse
## 1 1345.790771
## 2 1262.893677
## ---
## 1025 0.001168
## 1026 0.001168
##
## 20.662 sec elapsed
ensemble_tbl<- modeltime_table(
ensemble_fit_mean,
ensemble_fit_lm,
ensemble_fit_xg
)
ensemble_tbl%>%
combine_modeltime_tables(submodels_tbl)%>%
modeltime_accuracy(testing(splits))%>%
arrange(rmse,mae,mape,mase)%>%
knitr::kable()
## Warning: A correlation computation is required, but `estimate` is constant
## and has 0 standard deviation, resulting in a divide by 0 error. `NA` will be
## returned.
| .model_id | .model_desc | .type | mae | mape | mase | smape | rmse | rsq |
|---|---|---|---|---|---|---|---|---|
| 4 | ARIMA(0,1,0)(1,0,0)[12] WITH DRIFT W/ XGBOOST ERRORS | Test | 66.62842 | 3.755342 | 0.7740205 | 3.805410 | 75.78838 | 0.7921893 |
| 3 | ENSEMBLE (XGBOOST STACK): 4 MODELS | Test | 91.92276 | 5.117643 | 1.0678641 | 5.268277 | 105.26510 | 0.4953768 |
| 5 | PROPHET W/ XGBOOST ERRORS | Test | 118.03429 | 6.525348 | 1.3712011 | 6.833269 | 141.17847 | 0.0918866 |
| 2 | ENSEMBLE (GLMNET STACK): 4 MODELS | Test | 150.57993 | 8.410085 | 1.7492828 | 8.794916 | 168.50830 | 0.8190488 |
| 1 | ENSEMBLE (MEAN): 4 MODELS | Test | 153.85243 | 8.552809 | 1.7872993 | 9.045627 | 175.44023 | 0.6300985 |
| 6 | XGBOOST | Test | 168.01570 | 9.335913 | 1.9518337 | 9.930132 | 192.05816 | NA |
| 7 | RANDOMFOREST | Test | 311.34186 | 17.576806 | 3.6168497 | 19.370058 | 322.35502 | 0.4626032 |
ensemble_tbl%>%
modeltime_calibrate(testing(splits))%>%
modeltime_forecast(
new_data = testing(splits),
actual_data = data_prepared_tbl,
keep_data = T,
conf_by_id = T,
conf_interval = 0.95
)%>%
plot_modeltime_forecast(
.facet_ncol=2
)
## Warning: The 'id' column in calibration data was not detected. Global Confidence
## Interval is being returned.
ensemble_tbl%>%
combine_modeltime_tables(submodels_tbl)%>%
modeltime_calibrate(testing(splits))%>%
modeltime_forecast(
new_data = testing(splits),
actual_data = data_prepared_tbl,
keep_data = T
)%>%
plot_modeltime_forecast(
.facet_ncol=2
)
ensemble_tbl_all_model<-ensemble_tbl%>%
combine_modeltime_tables(submodels_tbl)
ensemble_refit_tbl <- ensemble_tbl%>%
modeltime_refit(data_prepared_tbl)
## frequency = 12 observations per 1 year
## Warning in mdl_time_refit.mdl_time_ensemble_model_spec(...): 'resamples' not
## provided during refitting. Submodels will be refit, but the meta-learner will
## *not* be refit. You can provide 'resamples' via `modeltime_refit(object, data,
## resamples, control)`. Proceeding by refitting the submodels only.
## frequency = 12 observations per 1 year
## Warning in mdl_time_refit.mdl_time_ensemble_model_spec(...): 'resamples' not
## provided during refitting. Submodels will be refit, but the meta-learner will
## *not* be refit. You can provide 'resamples' via `modeltime_refit(object, data,
## resamples, control)`. Proceeding by refitting the submodels only.
## frequency = 12 observations per 1 year
ensemble_refit_tbl%>%
modeltime_forecast(
new_data = future_tbl,
actual_data = data_prepared_tbl,
keep_data = T
)%>%
plot_modeltime_forecast(
.facet_ncol=2
)
## Warning: Expecting the following names to be in the data frame: .conf_hi, .conf_lo.
## Proceeding with '.conf_interval_show = FALSE' to visualize the forecast without confidence intervals.
## Alternatively, try using `modeltime_calibrate()` before forecasting to add confidence intervals.